home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Celestin Apprentice 5
/
Apprentice-Release5.iso
/
Source Code
/
Libraries
/
DCLAP 6d
/
dclap6d
/
DNet
/
DURL.cpp
< prev
next >
Wrap
Text File
|
1996-07-05
|
11KB
|
425 lines
// DURL
// d.g.gilbert, Sep.94
#include <ncbi.h>
#include <dgg.h>
#include <DFile.h>
#include "DTCP.h"
#include "DNetObject.h"
#include "DGoClasses.h"
#include "DURL.h"
// DURL ------------------------------
DURL::DURL()
{
}
short DURL::IsURL( const char *line, char*& url, long maxline)
{ // adapated from Lynx
char *cp, *lineend;
// don't crash on an empty argument
if (line == NULL || *line == '\0') return(DNetOb::kUnknownProt);
if (maxline) lineend = (char*)line + maxline;
else lineend= StrChr( (char*)line,'\0');
/// god damn over duplication -- combine this w/ NetOb's list of protos
for (cp= (char*)line; cp < lineend && *cp!= ':'; cp++) ;
if (*cp != ':') { url= (char*)line; return(DNetOb::kUnknownProt); }
else if (cp[1] != '/' && cp[2] != '/') {
// these are only ones that don't contain "://"
if (!Nlm_StrNICmp( (url=cp-4),"news",4)) return(DNetOb::kNNTPprot);
else if (!Nlm_StrNICmp( (url=cp-6),"mailto",6)) return(DNetOb::kSMTPprot);
else if (!Nlm_StrNICmp( (url=cp-5),"whois",5)) return(DNetOb::kWhoisprot);
else if (!Nlm_StrNICmp( (url=cp-6),"finger",6)) return(DNetOb::kFingerprot);
else if (!Nlm_StrNICmp( (url=cp-4),"nntp",4)) return(DNetOb::kNNTPprot);
else if (!Nlm_StrNICmp( (url=cp-9),"newspost",8)) return(DNetOb::kUnsupportedProt);
else {
while (cp>line && isalpha(cp[-1])) cp--;
url= cp;
return(DNetOb::kUnknownProt);
}
}
else if (!Nlm_StrNICmp( (url=cp-6),"gopher",6)) return(DNetOb::kGopherprot);
else if (!Nlm_StrNICmp( (url=cp-4),"http",4)) return(DNetOb::kHTTPprot);
else if (!Nlm_StrNICmp( (url=cp-3),"bop",3)) return(DNetOb::kBOPprot);
else if (!Nlm_StrNICmp( (url=cp-3),"pop",3)) return(DNetOb::kPOPprot);
else if (!Nlm_StrNICmp( (url=cp-3),"ftp",3)) return(DNetOb::kFTPprot);
else if (!Nlm_StrNICmp( (url=cp-4),"file",4)) return(DNetOb::kFileprot);
else if (!Nlm_StrNICmp( (url=cp-4),"wais",4)) return(DNetOb::kWAISprot);
else if (!Nlm_StrNICmp( (url=cp-6),"telnet",6)) return(DNetOb::kTelnetprot);
else if (!Nlm_StrNICmp( (url=cp-6),"tn3270",6)) return(DNetOb::kTN3270prot);
else if (!Nlm_StrNICmp( (url=cp-6),"rlogin",6)) return(DNetOb::kUnsupportedProt);
else if (!Nlm_StrNICmp( (url=cp-3),"afs",3)) return(DNetOb::kUnsupportedProt);
else if (!Nlm_StrNICmp( (url=cp-8),"prospero",8)) return(DNetOb::kUnsupportedProt);
// !! Need also to support User-Added protocols via Type/Handler method !
else {
while (cp>line && isalpha(cp[-1])) cp--;
url= cp;
return(DNetOb::kUnknownProt);
}
}
// from Lynx
/* Escape undesirable characters using % HTEscape()
** -------------------------------------
**
** This function takes a pointer to a string in which
** some characters may be unacceptable unescaped.
** It returns a string which has these characters
** represented by a '%' character followed by two hex digits.
**
** In the tradition of being conservative in what you do and liberal
** in what you accept, we encode some characters which in fact are
** allowed in URLs unencoded -- so DON'T use the table below for
** parsing!
**
** Unlike HTUnEscape(), this routine returns a malloced string.
**
*/
static unsigned char isAcceptable[96] =
/* Overencodes */
/* Bit 0 alphaChars -- see HTFile.h
** Bit 1 alphaPlusChars -- as alphaChars but with plus.
** Bit 2 ... pathChars -- as alphaPlusChars but with /
*/
/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
{ 0,0,0,0,0,0,0,0,0,0,7,6,0,7,7,4, /* 2x !"#$%&'()*+,-./ */
7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 4x @ABCDEFGHIJKLMNO */
7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7, /* 5X PQRSTUVWXYZ[\]^_ */
0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 6x `abcdefghijklmno */
7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0 }; /* 7X pqrstuvwxyz{\}~ DEL */
#define OKAYCHAR(a,mask) ( a>=32 && a<128 && ((isAcceptable[a-32]) & mask))
char* DURL::EncodeChars( const char* str, unsigned char mask)
{
static char *hexchars = "0123456789ABCDEF";
const char * p;
char * q;
char * result;
int unacceptable = 0;
for (p=str; *p; p++)
if (!OKAYCHAR( (unsigned char)*p, mask))
unacceptable++;
result = (char *) MemNew( p - str + unacceptable + unacceptable + 1);
if (result == NULL) return "";
for (q=result, p=str; *p; p++) {
unsigned char a = *p;
if (!OKAYCHAR(a, mask)) {
*q++ = HEX_ESCAPE;
*q++ = hexchars[a >> 4];
*q++ = hexchars[a & 15];
}
else *q++ = *p;
}
*q++ = 0;
return result;
}
/* Decode %xx escaped characters HTUnEscape()
** -----------------------------
**
** This function takes a pointer to a string in which some
** characters may have been encoded in %xy form, where xy is
** the acsii hex code for character 16x+y.
** The string is converted in place, as it will never grow.
*/
static char from_hex(char c)
{
return c >= '0' && c <= '9' ? c - '0'
: c >= 'A' && c <= 'F'? c - 'A' + 10
: c - 'a' + 10;
}
//static
char* DURL::DecodeChars( char * str)
{
char * p = str;
char * q = str;
if (!str) return "";
while(*p) {
if (*p == HEX_ESCAPE) {
p++;
if (*p) *q = from_hex(*p++) * 16;
if (*p) *q += from_hex(*p++);
q++;
}
else
*q++ = *p++;
}
*q++ = 0;
return str;
}
char* DURL::GetParts( const char* url, long whichparts, long urlsize)
{
char *cp, *ep, sep, *newurl, *line, *newline;
if (url == NULL || *url == '\0') return NULL;
if (!urlsize) urlsize= StrLen(url);
line= (char*) Nlm_MemNew(urlsize+1);
Nlm_MemCopy(line, url, urlsize);
line[urlsize]= 0;
newline= (char*) Nlm_MemNew(urlsize+1);
*newline= 0;
long prot= IsURL( line, newurl);
cp= newurl;
ep= StrChr( newurl+3, ':');
if (ep) ep++;
if (*ep == '/') ep++;
if (*ep == '/') ep++;
if (whichparts & kPartProtocol) {
sep= *ep; *ep= 0;
StrCat( newline, cp);
*ep= sep;
}
cp= ep;
while (isspace(*cp)) cp++;
ep= cp;
while ( *ep && (OKAYCHAR(*ep, alphaPlusChars)) ) ep++;
//while (*ep && (isalnum(*ep) || *ep == '.')) ep++;
while (isspace(*ep)) ep++;
if (whichparts & kPartHost) {
sep= *ep; *ep= 0;
StrCat( newline, cp);
*ep= sep;
}
cp= ep;
if (*cp == ':') { // port num
ep = cp + 1;
while (isspace(*ep)) ep++;
while (isdigit(*ep)) ep++;
if (whichparts & kPartPort) {
sep= *ep; *ep= 0;
StrCat( newline, cp);
*ep= sep;
}
cp= ep;
}
while (isspace(*cp)) cp++;
if (*newline && *cp == '/') {
StrCat( newline, "/");
}
if (whichparts & kPartPath) {
if (*cp == '/') cp++;
StrCat( newline, cp);
}
MemFree( line);
return newline;
}
Boolean DURL::ParseURL( DNetOb* nob, const char *url, long urlsize, Boolean verbatim)
{
char *cp, *ep, *qp, sep, *line, *newurl;
short prot;
Boolean isLocalhost;
if (!nob || url == NULL || *url == '\0') return false;
if (!urlsize) urlsize= StrLen(url);
// can we do this space check safely?
newurl= (char*)url; while (isspace(*newurl)) { newurl++; urlsize--; }
ep= newurl + urlsize - 1; while (isspace(*ep)) { ep--; urlsize--; }
line= (char*) Nlm_MemNew(urlsize+1);
Nlm_MemCopy(line, newurl, urlsize);
line[urlsize]= 0;
prot= IsURL( line, newurl);
if (prot <= DNetOb::kUnknownProt) {
MemFree( line);
return false;
}
else {
// need to drop "", '', <> or other delimiters from url end
// also filter out newlines, controls, and spaces unless "" or ''
{
cp= newurl;
if (cp>line) do { cp--; } while (cp>line && !isgraph(*cp));
ep= cp;
switch (*cp) {
case '"':
newurl= cp+1;
do { if (isprint(*cp)) *ep++= *cp; cp++; } while (*cp && *cp!='"');
*ep= 0;
break;
case '\'':
newurl= cp+1;
do { if (isprint(*cp)) *ep++= *cp; cp++; } while (*cp && *cp!='\'');
*ep= 0;
break;
case '<':
newurl= cp+1;
if (verbatim)
do { if (isprint(*cp)) *ep++= *cp; cp++; } while (*cp && *cp!='>');
else
do { if (isgraph(*cp)) *ep++= *cp; cp++; } while (*cp && *cp!='>');
*ep= 0;
break;
default:
newurl= cp;
if (verbatim)
do { if (isprint(*cp)) *ep++= *cp; cp++; } while (*cp);
else
do { if (isgraph(*cp)) *ep++= *cp; cp++; } while (*cp);
*ep= 0;
break;
}
}
nob->StoreProtocol( prot);
nob->StoreURL(newurl);
cp= StrChr(newurl+3,':');
cp++;
if (*cp == '/') cp++;
if (*cp == '/') cp++;
while (isspace(*cp)) cp++;
if (prot == DNetOb::kSMTPprot) {
DecodeChars(cp);
nob->fType= kMailType; // mailto gopher kind
nob->StorePath(cp);
MemFree( line);
return true;
}
ep= cp;
while ( *ep && (OKAYCHAR(*ep, alphaPlusChars)) ) ep++;
//while (*ep && (isalnum(*ep) || *ep == '.' || *ep == '-')) ep++;
while (isspace(*ep)) ep++;
sep= *ep; *ep= 0;
isLocalhost = (ep - cp < 2); // || StrICmp( cp, "localhost")==0 );
nob->StoreHost( cp);
*ep= sep;
cp= ep; // NO: +1;
if (sep == ':') { // port num
ep= ++cp;
while (isspace(*ep)) ep++;
while (isdigit(*ep)) ep++;
sep= *ep; *ep= 0;
nob->StorePort( cp);
*ep= sep;
cp= ep; //+1;
}
if (sep == '/') ;
while (isspace(*cp)) cp++;
qp= StrChr(cp, '?');
if (qp) {
*qp++= 0;
nob->fQueryGiven= StrDup(qp);
}
DecodeChars(cp); // ???
switch (prot) {
case DNetOb::kGopherprot: // gopher://host.name:70/00/path/to/data
if (*cp) cp++;
if (*cp == 0) {
nob->fType= kTypeFolder;
nob->StorePath("");
}
else {
nob->fType= *cp;
nob->StorePath( cp+1);
}
break;
case DNetOb::kFTPprot: // file:///path:to:file
case DNetOb::kFileprot: // file:///path:to:file
if (*cp == '/') {
// this leading slash is artifact of silly url syntax !!
// skip for all????? but sometimes (non-local) this is valid
#if 1
if (isLocalhost) cp++;
#else
#ifdef OS_VMS
cp++; // vms, skip leading /
#endif
#ifdef OS_MAC
cp++; // mac, skip leading /
#endif
#ifdef OS_DOS
if (StrChr(cp,':')) cp++; // have "/C:\dos\path" form
else *cp= '\\'; // have a "/dos\path" form
#endif
#endif
}
nob->fType= kTypeFile; // !! need ftp folder handling !?
nob->StorePath( cp);
break;
case DNetOb::kHTTPprot: // http://host.name:80/path/to/data
nob->fType= kTypeHtml;
if (*cp == 0) nob->StorePath( "/");
else nob->StorePath( cp);
break;
case DNetOb::kTelnetprot:
nob->fType= kTypeTelnet;
nob->StorePath( cp);
break;
case DNetOb::kTN3270prot:
nob->fType= kTypeTn3270;
nob->StorePath( cp);
break;
case DNetOb::kFingerprot:
case DNetOb::kWhoisprot:
nob->fType= kTypeQuery;
nob->StorePath( cp);
break;
case DNetOb::kWAISprot: // ?? or need Folder type or Waisdir & WaisDoc types?
default:
nob->fType= kTypeFile;
nob->StorePath( cp);
break;
}
// set a usable title !?
const char *name= nob->GetPath();
if ( name == NULL || *name == 0 || StrCmp(name, "/") == 0)
name= nob->GetHost();
else
name= gFileManager->FilenameFromPath(name);
if ( name && *name != 0 )
nob->StoreName( (char*)name);
}
MemFree( line);
return true;
}